"""
@author: yqx
@file: date_gain.py
@time: 2024/6/18 19:24
"""

from bs4 import BeautifulSoup
import data_get_url as geturl
"""
获取职位
"""
urls="https://www.zhipin.com/"
text=geturl.download(urls)
    # 如果请求失败，则抛出HTTPError异常
    # response.raise_for_status()
#网页所有数据
soup=BeautifulSoup(text,'lxml')
# print(soup)
#获取html存在的所有职位及职位分类数据
# job_type:包括了所有需要的数据
job_type=soup.find_all(class_="menu-sub")
# print(job_type)
import csv
with open("data_position.csv",'w',encoding='utf-8')as wr:
    #创建一个写入csv文件的对象
    csvwriter=csv.writer(wr)
    #创建索引
    headers=["行业","方向","职位"]
    #写入索引
    csvwriter.writerow(headers)
    for industry in job_type:#industry 行业
        # print(industry)
        ct1=industry.find_all('li')
        # print(ts1)
        #indu 行业的名字
        indu = industry.find('p').get_text(strip=True)
        # print(indu)
        for direction in ct1:#direction 行业中的方向
            # print(direction)
            dire=direction.find('h4').get_text(strip=True)
            # print(dire)
            ct2=direction.find_all('a')
            # print(pro)
            for profession in ct2:#profession 职位
                prof=profession.get_text(strip=True)
                # print(prof)
                row_data=[indu,dire,prof]
                csvwriter.writerow(row_data)



# https://www.zhipin.com/web/geek/job?city=101010100&position=100901

